Observed log-chlorophyll at representative station in SF Bay Delta region.
library(tidyverse)
library(lubridate)
library(mgcv)
library(plotly)
# flow data, left moving window average of 30 days
data(flow_dat)
fl_dat <- flow_dat %>%
filter(station %in% 'sjr') %>%
mutate(
qsm = stats::filter(q, rep(1, 30)/30, sides = 1, method = 'convolution')
)
# format the data to model
data(sf_dat)
sf_sub <- sf_dat %>%
filter(Site_Code %in% 'P8') %>%
mutate(
doy = yday(Date),
decyr = decimal_date(Date),
yr = year(Date),
mo = month(Date, label = T)
) %>%
left_join(fl_dat, by = 'Date') %>%
mutate(
lnQ = log(qsm),
lnchl = log(chl)
) %>%
select(-q, -qsm, -station, -Latitude, -Longitude, -Location)
# plot, all
p <- ggplot(sf_sub, aes(x = Date, y = lnchl)) +
geom_line() +
theme_bw()
ggplotly(p)
# boxplot, by years
p <- ggplot(sf_sub, aes(x = yr, y = lnchl)) +
geom_boxplot() +
theme_bw()
ggplotly(p)
# boxplot, by month
p <- ggplot(sf_sub, aes(x = mo, y = lnchl)) +
geom_boxplot() +
theme_bw()
ggplotly(p)
Some simple GAMs to explore annual, seasonal trends.
# annual only
mod1 <- gam(lnchl ~ s(decyr, bs = 'tp'),
data = sf_sub,
na.action = na.exclude
)
# seasonal only
mod2 <- gam(lnchl ~ s(doy, bs = 'cc'),
knots = list(doy = c(1, 366)),
data = sf_sub,
na.action = na.exclude
)
# annual and seasonal, additive
mod3 <- gam(lnchl ~ s(decyr, bs = 'tp') +
s(doy, bs = 'cc'),
knots = list(doy = c(1, 366)),
data = sf_sub,
na.action = na.exclude
)
# annual and seasonal, interaction
mod4 <- gam(lnchl ~ te(decyr, doy, bs = c('tp', 'cc')),
knots = list(doy = c(1, 366)),
data = sf_sub,
na.action = na.exclude
)
Summary stats of annual, seasonal models:
mods <- list(
mod1 = mod1,
mod2 = mod2,
mod3 = mod3,
mod4 = mod4
)
# smoother stats of GAMs
map(mods, ~ summary(.x)$s.table %>% data.frame %>% rownames_to_column('smoother')) %>%
enframe %>%
unnest %>%
kable
| name | smoother | edf | Ref.df | F | p.value |
|---|---|---|---|---|---|
| mod1 | s(decyr) | 8.212548 | 8.830184 | 32.55938 | 0 |
| mod2 | s(doy) | 7.083832 | 8.000000 | 16.06665 | 0 |
| mod3 | s(decyr) | 8.394141 | 8.896837 | 41.39759 | 0 |
| mod3 | s(doy) | 7.393221 | 8.000000 | 24.13289 | 0 |
| mod4 | te(decyr,doy) | 16.859549 | 18.456279 | 25.60994 | 0 |
# summary stats of GAMs
map(mods, ~ data.frame(
AIC = AIC(.x),
R2 = summary(.x)$r.sq)) %>%
enframe %>%
unnest %>%
kable
| name | AIC | R2 |
|---|---|---|
| mod1 | 1188.287 | 0.3406925 |
| mod2 | 1304.711 | 0.1845578 |
| mod3 | 1030.524 | 0.5109149 |
| mod4 | 1083.738 | 0.4625216 |
# prediction data
pred_dat <- data.frame(
decyr = seq(min(sf_sub$decyr), max(sf_sub$decyr), length = 1000)
) %>%
mutate(
Date = date_decimal(decyr),
Date = as.Date(Date),
mo = month(Date, label = TRUE),
doy = yday(Date),
yr = year(Date)
)
# predictions
sf_res <- pred_dat %>%
mutate(
mod1 = predict(mod1, newdata = pred_dat),
mod2 = predict(mod2, newdata = pred_dat),
mod3 = predict(mod3, newdata = pred_dat),
mod4 = predict(mod4, newdata = pred_dat)
) %>%
tidyr::gather('mods', 'pred', -Date, -mo, -doy, -decyr, -yr)
# plot
p <- ggplot(sf_res, aes(x = Date)) +
geom_point(data = sf_sub, aes(y = lnchl), size = 0.5) +
geom_line(aes(y = pred, colour = mods)) +
theme_bw() +
theme(
legend.position = 'top',
legend.title = element_blank()
)
ggplotly(p)
# plot
p <- ggplot(sf_res, aes(x = doy, group = factor(yr), colour = yr)) +
geom_line(aes(y = pred)) +
theme_bw() +
theme(
legend.position = 'top',
legend.title = element_blank()
) +
facet_wrap(~ mods, ncol = 2)
ggplotly(p)
Adding flow data to the model:
# model using a tensor product smooth for interactions
modall <- gam(lnchl ~ te(decyr, doy, lnQ, bs = c('tp', 'cc', 'tp')),
knots = list(doy = c(1, 366)),
data = sf_sub,
na.action = na.exclude
)